# -*- coding: utf-8 -*-

import math

import scrapy
import re
from apps.tax_policy.tax_policy.items import NetTaxPolicyItem as Item, urllib, time, urljoin
from apps.tax_policy.tax_policy.spiders.base_spider.base_tax_policy_spider import BaseTaxPolicySpider


class SichuanZigongPolicy(BaseTaxPolicySpider):
    name = 'sichuan_zigong_policy'

    province = '四川省'
    city = '自贡市'
    # county = '滑县'
    park = ''

    def __init__(self, get_next=False, **kwargs):
        super().__init__(**kwargs)
        print(get_next)
        self.get_next = get_next

    @classmethod
    def update_settings(cls, settings) -> None:
        new_settings = {
            **(cls.custom_settings or {}),
            **{
                # "COOKIES_ENABLED": False,
                "DOWNLOAD_TIMEOUT": 200,
                # "DOWNLOADER_MIDDLEWARES": {
                #     "components.middlewares.downloadmiddlewares.public.company_ip_by_api.CompanyIpByApiMiddleware": None,
                # },
            },
        }
        settings.setdict(new_settings or {}, priority="spider")

    def start_requests(self):
        for source, county, url in [
            ['自贡市人民政府', "", "http://www.zg.gov.cn/-5?p_p_id=shortlistmultiall_WAR_CMSportlet&p_p_lifecycle=0&p_p_state=normal&p_p_mode=view&p_p_col_id=column-2&p_p_col_pos=1&p_p_col_count=2&_shortlistmultiall_WAR_CMSportlet_keywords=&_shortlistmultiall_WAR_CMSportlet_advancedSearch=false&_shortlistmultiall_WAR_CMSportlet_andOperator=true&_shortlistmultiall_WAR_CMSportlet_resetCur=false&_shortlistmultiall_WAR_CMSportlet_delta=75"],
            ['自贡市人民政府', "", "http://www.zg.gov.cn/-15?p_p_id=cms2_WAR_CMSportlet&p_p_lifecycle=0&p_p_state=normal&p_p_mode=view&p_p_col_id=column-2&p_p_col_pos=1&p_p_col_count=2&_cms2_WAR_CMSportlet_delta=75&_cms2_WAR_CMSportlet_keywords=&_cms2_WAR_CMSportlet_advancedSearch=false&_cms2_WAR_CMSportlet_andOperator=true&_cms2_WAR_CMSportlet__inner_mainContainerId_=_cms2_WAR_CMSportlet_main_container_id07059766373398675&_cms2_WAR_CMSportlet_resetCur=false&_cms2_WAR_CMSportlet_cur=1"],
            ['自贡市人民政府', "", "http://www.zg.gov.cn/szfbmwj?p_p_id=cms2_WAR_CMSportlet&p_p_lifecycle=0&p_p_state=normal&p_p_mode=view&p_p_col_id=column-2&p_p_col_pos=1&p_p_col_count=2&_cms2_WAR_CMSportlet_keywords=&_cms2_WAR_CMSportlet_advancedSearch=false&_cms2_WAR_CMSportlet_andOperator=true&_cms2_WAR_CMSportlet__inner_mainContainerId_=_cms2_WAR_CMSportlet_main_container_id09031965824152703&_cms2_WAR_CMSportlet_resetCur=false&_cms2_WAR_CMSportlet_delta=75"],
            ['自贡市自流井区人民政府', "自流井区", "http://www.zlj.gov.cn/-44?p_p_id=cms2_WAR_CMSportlet&p_p_lifecycle=0&p_p_state=normal&p_p_mode=view&p_p_col_id=column-2&p_p_col_pos=1&p_p_col_count=2&_cms2_WAR_CMSportlet_keywords=&_cms2_WAR_CMSportlet_advancedSearch=false&_cms2_WAR_CMSportlet_andOperator=true&_cms2_WAR_CMSportlet__inner_mainContainerId_=_cms2_WAR_CMSportlet_main_container_id08057895155643585&_cms2_WAR_CMSportlet_resetCur=false&_cms2_WAR_CMSportlet_delta=75"],
            ['自贡市自流井区人民政府', "自流井区", "http://www.zlj.gov.cn/-111?p_p_id=cms2_WAR_CMSportlet&p_p_lifecycle=0&p_p_state=normal&p_p_mode=view&p_p_col_id=column-2&p_p_col_pos=1&p_p_col_count=2&_cms2_WAR_CMSportlet_keywords=&_cms2_WAR_CMSportlet_advancedSearch=false&_cms2_WAR_CMSportlet_andOperator=true&_cms2_WAR_CMSportlet__inner_mainContainerId_=_cms2_WAR_CMSportlet_main_container_id03042050009375863&_cms2_WAR_CMSportlet_resetCur=false&_cms2_WAR_CMSportlet_delta=75"],
            ['自贡市贡井区人民政府', "贡井区", "http://www.gj.gov.cn/zcwj?p_p_id=cms2_WAR_CMSportlet&p_p_lifecycle=0&p_p_state=normal&p_p_mode=view&p_p_col_id=column-2&p_p_col_pos=1&p_p_col_count=2&_cms2_WAR_CMSportlet_keywords=&_cms2_WAR_CMSportlet_advancedSearch=false&_cms2_WAR_CMSportlet_andOperator=true&_cms2_WAR_CMSportlet__inner_mainContainerId_=_cms2_WAR_CMSportlet_main_container_id09645809014516296&_cms2_WAR_CMSportlet_resetCur=false&_cms2_WAR_CMSportlet_delta=75"],
            ['自贡市贡井区人民政府', "贡井区", "http://www.gj.gov.cn/xzgfxwj?p_p_id=cms2_WAR_CMSportlet&p_p_lifecycle=0&p_p_state=normal&p_p_mode=view&p_p_col_id=column-2&p_p_col_pos=1&p_p_col_count=2&_cms2_WAR_CMSportlet_keywords=&_cms2_WAR_CMSportlet_advancedSearch=false&_cms2_WAR_CMSportlet_andOperator=true&_cms2_WAR_CMSportlet__inner_mainContainerId_=_cms2_WAR_CMSportlet_main_container_id09645809014516296&_cms2_WAR_CMSportlet_resetCur=false&_cms2_WAR_CMSportlet_delta=75"],
            ['自贡市大安区人民政府', "大安区", "http://www.zgda.gov.cn/-6?p_p_id=cms2_WAR_CMSportlet&p_p_lifecycle=0&p_p_state=normal&p_p_mode=view&p_p_col_id=column-2&p_p_col_pos=1&p_p_col_count=2&_cms2_WAR_CMSportlet_keywords=&_cms2_WAR_CMSportlet_advancedSearch=false&_cms2_WAR_CMSportlet_andOperator=true&_cms2_WAR_CMSportlet__inner_mainContainerId_=_cms2_WAR_CMSportlet_main_container_id06731214362548316&_cms2_WAR_CMSportlet_resetCur=false&_cms2_WAR_CMSportlet_delta=75"],
            ['自贡市大安区人民政府', "大安区", "http://www.zgda.gov.cn/zcwj?p_p_id=cms2_WAR_CMSportlet&p_p_lifecycle=0&p_p_state=normal&p_p_mode=view&p_p_col_id=column-2&p_p_col_pos=1&p_p_col_count=2&_cms2_WAR_CMSportlet_keywords=&_cms2_WAR_CMSportlet_advancedSearch=false&_cms2_WAR_CMSportlet_andOperator=true&_cms2_WAR_CMSportlet__inner_mainContainerId_=_cms2_WAR_CMSportlet_main_container_id02792969403379847&_cms2_WAR_CMSportlet_resetCur=false&_cms2_WAR_CMSportlet_delta=75"],
            ['自贡市沿滩区人民政府', "沿滩区", "http://www.zgyt.gov.cn/-66?p_p_id=shortlistmultiall_WAR_CMSportlet&p_p_lifecycle=0&p_p_state=normal&p_p_mode=view&p_p_col_id=column-2&p_p_col_count=1&_shortlistmultiall_WAR_CMSportlet_keywords=&_shortlistmultiall_WAR_CMSportlet_advancedSearch=false&_shortlistmultiall_WAR_CMSportlet_andOperator=true&_shortlistmultiall_WAR_CMSportlet_resetCur=false&_shortlistmultiall_WAR_CMSportlet_delta=75"],
            ['自贡市沿滩区人民政府', "沿滩区", "http://www.zgyt.gov.cn/-51?p_p_id=shortlistmultiall_WAR_CMSportlet&p_p_lifecycle=0&p_p_state=normal&p_p_mode=view&p_p_col_id=column-2&p_p_col_count=1&_shortlistmultiall_WAR_CMSportlet_keywords=&_shortlistmultiall_WAR_CMSportlet_advancedSearch=false&_shortlistmultiall_WAR_CMSportlet_andOperator=true&_shortlistmultiall_WAR_CMSportlet_resetCur=false&_shortlistmultiall_WAR_CMSportlet_delta=75"],
            ['自贡市荣县人民政府', "荣县", "http://www.rongzhou.gov.cn/xzgfxwj?p_p_id=cms2_WAR_CMSportlet&p_p_lifecycle=0&p_p_state=normal&p_p_mode=view&p_p_col_id=column-2&p_p_col_pos=1&p_p_col_count=2&_cms2_WAR_CMSportlet_keywords=&_cms2_WAR_CMSportlet_advancedSearch=false&_cms2_WAR_CMSportlet_andOperator=true&_cms2_WAR_CMSportlet__inner_mainContainerId_=_cms2_WAR_CMSportlet_main_container_id07426184055405733&_cms2_WAR_CMSportlet_resetCur=false&_cms2_WAR_CMSportlet_delta=75"],
            ['自贡市富顺县人民政府', "富顺县", "http://www.fsxzf.gov.cn/zcwj?p_p_id=cms2_WAR_CMSportlet&p_p_lifecycle=0&p_p_state=normal&p_p_mode=view&p_p_col_id=column-2&p_p_col_pos=1&p_p_col_count=2&_cms2_WAR_CMSportlet_keywords=&_cms2_WAR_CMSportlet_advancedSearch=false&_cms2_WAR_CMSportlet_andOperator=true&_cms2_WAR_CMSportlet__inner_mainContainerId_=_cms2_WAR_CMSportlet_main_container_id02381992921274968&_cms2_WAR_CMSportlet_resetCur=false&_cms2_WAR_CMSportlet_delta=75"],
            ['自贡市富顺县人民政府', "富顺县", "http://www.fsxzf.gov.cn/qtwj?p_p_id=cms2_WAR_CMSportlet&p_p_lifecycle=0&p_p_state=normal&p_p_mode=view&p_p_col_id=column-2&p_p_col_pos=1&p_p_col_count=2&_cms2_WAR_CMSportlet_keywords=&_cms2_WAR_CMSportlet_advancedSearch=false&_cms2_WAR_CMSportlet_andOperator=true&_cms2_WAR_CMSportlet__inner_mainContainerId_=_cms2_WAR_CMSportlet_main_container_id04550113997483407&_cms2_WAR_CMSportlet_resetCur=false&_cms2_WAR_CMSportlet_delta=75"],
        ]:
            item = {'source': source, 'county': county}
            yield scrapy.Request(url, callback=self.parse_list, meta={'item': item}, dont_filter=True)

    def parse_list(self, response, **kwargs):
        meta = response.meta
        prev_item = meta.get('item')
        if '每页显示记录数' not in response.text and meta.get('retry_time', 0) < 3:
            meta['retry_time'] = meta.get('retry_time', 0) + 1
            yield response.follow(response.url, callback=self.parse_list, meta=meta, dont_filter=True)
            return
        elem_list = response.xpath(
            "//ul//li//a[(contains(@href, '/web/guest') or contains(@href, 'articles') or @title) and not(starts-with(text(), '|'))]/ancestor::li[string-length(string(.))>=9]"
        )

        for elem in elem_list:
            item = Item()
            item['source_url'] = elem.xpath("""./a/@href""").get()
            item['source_url'] = response.urljoin(item['source_url'])
            item['publish_date'] = elem.xpath("""./span/text()""").get()
            if prev_item is not None:
                for key, value in prev_item.items():
                    item[key] = value
            if not item['source_url']:
                continue
            if '.htm' not in item['source_url'] and '.shtm' not in item['source_url']:
                continue
            if 'relativeInfo' in item['source_url']:
                continue
            print(response.url, item['source_url'])
            yield response.follow(item['source_url'], callback=self.parse_detail, meta={'item': item})
        if self.get_next:
            page = response.meta.get('page', 1)
            next_url = response.xpath(
                """//a[text()='下页' or ./text()='下页>' or text()='»' or contains(./text(), '下一页') or text()='下一页»']/@href""").re_first("'(.*)'")
            if not next_url:
                next_url = response.xpath(f"""//b[text()='{page}']/following::a/@href""").get()
            if next_url and next_url not in ['javascript:void(0);'] and response.urljoin(next_url) != response.url:
                print(response.url, 'next', next_url)
                yield response.follow(next_url, callback=self.parse_list, meta={'item': prev_item, "page": page + 1})
            else:
                print(response.url, 'not next_url')

    def parse_detail(self, response, **kwargs):
        item = Item() if response.meta.get('item') is None else response.meta.get('item')
        item['title'] = response.xpath("""string(//meta[@name='ArticleTitle']/@content)""").get() or response.xpath("/title/text()").get()
        item['publish_date'] = response.xpath("//span[contains(.//text(), '发布时间')]/following-sibling::span//text()").re_first(r"(\d{4}-\d{1,2}-\d{1,2})")
        print(item['publish_date'])
        item['content'] = response.xpath(""".""").get()
        # item['source'] = self.source
        # print(item['content'])
        item['province'] = self.province
        item['city'] = self.city
        # item['county'] = self.county
        item['park'] = self.park
        yield item


if __name__ == "__main__":
    from scrapy import cmdline

    cmdline.execute("scrapy crawl sichuan_zigong_policy -a get_next=0".split())
